home *** CD-ROM | disk | FTP | other *** search
- /*
- EPSHeader
-
- File: match.c
- Author: J. Kercheval
- Created: Sat, 01/05/1991 22:21:49
- */
- /*
- EPSRevision History
-
- J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain
- J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them)
- J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche()
- J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code
- J. Kercheval Sun, 03/10/1991 20:37:11 beef up main()
- J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain
- */
-
- /*
- Wildcard Pattern Matching
- */
-
-
- #include "match.h"
-
- int matche_after_star (register char *pattern, register char *text);
- int fast_match_after_star (register char *pattern, register char *text);
-
- /*----------------------------------------------------------------------------
- *
- * Return TRUE if PATTERN has any special wildcard characters
- *
- ----------------------------------------------------------------------------*/
-
- BOOLEAN is_pattern (char *p)
- {
- while ( *p ) {
- switch ( *p++ ) {
- case '?':
- case '*':
- case '[':
- case '\\':
- return TRUE;
- }
- }
- return FALSE;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * Return TRUE if PATTERN has is a well formed regular expression according
- * to the above syntax
- *
- * error_type is a return code based on the type of pattern error. Zero is
- * returned in error_type if the pattern is a valid one. error_type return
- * values are as follows:
- *
- * PATTERN_VALID - pattern is well formed
- * PATTERN_ESC - pattern has invalid escape ('\' at end of pattern)
- * PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
- * PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
- * PATTERN_EMPTY - [..] construct is empty (ie [])
- *
- ----------------------------------------------------------------------------*/
-
- BOOLEAN is_valid_pattern (char *p, int *error_type)
- {
-
- /* init error_type */
- *error_type = PATTERN_VALID;
-
- /* loop through pattern to EOS */
- while( *p ) {
-
- /* determine pattern type */
- switch( *p ) {
-
- /* check literal escape, it cannot be at end of pattern */
- case '\\':
- if( !*++p ) {
- *error_type = PATTERN_ESC;
- return FALSE;
- }
- p++;
- break;
-
- /* the [..] construct must be well formed */
- case '[':
- p++;
-
- /* if the next character is ']' then bad pattern */
- if ( *p == ']' ) {
- *error_type = PATTERN_EMPTY;
- return FALSE;
- }
-
- /* if end of pattern here then bad pattern */
- if ( !*p ) {
- *error_type = PATTERN_CLOSE;
- return FALSE;
- }
-
- /* loop to end of [..] construct */
- while( *p != ']' ) {
-
- /* check for literal escape */
- if( *p == '\\' ) {
- p++;
-
- /* if end of pattern here then bad pattern */
- if ( !*p++ ) {
- *error_type = PATTERN_ESC;
- return FALSE;
- }
- }
- else
- p++;
-
- /* if end of pattern here then bad pattern */
- if ( !*p ) {
- *error_type = PATTERN_CLOSE;
- return FALSE;
- }
-
- /* if this a range */
- if( *p == '-' ) {
-
- /* we must have an end of range */
- if ( !*++p || *p == ']' ) {
- *error_type = PATTERN_RANGE;
- return FALSE;
- }
- else {
-
- /* check for literal escape */
- if( *p == '\\' )
- p++;
-
- /* if end of pattern here then bad pattern */
- if ( !*p++ ) {
- *error_type = PATTERN_ESC;
- return FALSE;
- }
- }
- }
- }
- break;
-
- /* all other characters are valid pattern elements */
- case '*':
- case '?':
- default:
- p++; /* "normal" character */
- break;
- }
- }
-
- return TRUE;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * Match the pattern PATTERN against the string TEXT;
- *
- * returns MATCH_VALID if pattern matches, or an errorcode as follows
- * otherwise:
- *
- * MATCH_PATTERN - bad pattern
- * MATCH_LITERAL - match failure on literal mismatch
- * MATCH_RANGE - match failure on [..] construct
- * MATCH_ABORT - premature end of text string
- * MATCH_END - premature end of pattern string
- * MATCH_VALID - valid match
- *
- *
- * A match means the entire string TEXT is used up in matching.
- *
- * In the pattern string:
- * `*' matches any sequence of characters (zero or more)
- * `?' matches any character
- * [SET] matches any character in the specified set,
- * [!SET] or [^SET] matches any character not in the specified set.
- *
- * A set is composed of characters or ranges; a range looks like
- * character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
- * minimal set of characters allowed in the [..] pattern construct.
- * Other characters are allowed (ie. 8 bit characters) if your system
- * will support them.
- *
- * To suppress the special syntactic significance of any of `[]*?!^-\',
- * and match the character exactly, precede it with a `\'.
- *
- ----------------------------------------------------------------------------*/
-
- int matche ( register char *p, register char *t )
- {
- register char range_start, range_end; /* start and end in range */
-
- BOOLEAN invert; /* is this [..] or [!..] */
- BOOLEAN member_match; /* have I matched the [..] construct? */
- BOOLEAN loop; /* should I terminate? */
-
- for ( ; *p; p++, t++ ) {
-
- /* if this is the end of the text then this is the end of the match */
- if (!*t) {
- return ( *p == '*' && *++p == '\0' ) ? MATCH_VALID : MATCH_ABORT;
- }
-
- /* determine and react to pattern type */
- switch ( *p ) {
-
- /* single any character match */
- case '?':
- break;
-
- /* multiple any character match */
- case '*':
- return matche_after_star (p, t);
-
- /* [..] construct, single member/exclusion character match */
- case '[': {
-
- /* move to beginning of range */
- p++;
-
- /* check if this is a member match or exclusion match */
- invert = FALSE;
- if ( *p == '!' || *p == '^') {
- invert = TRUE;
- p++;
- }
-
- /* if closing bracket here or at range start then we have a
- malformed pattern */
- if ( *p == ']' ) {
- return MATCH_PATTERN;
- }
-
- member_match = FALSE;
- loop = TRUE;
-
- while ( loop ) {
-
- /* if end of construct then loop is done */
- if (*p == ']') {
- loop = FALSE;
- continue;
- }
-
- /* matching a '!', '^', '-', '\' or a ']' */
- if ( *p == '\\' ) {
- range_start = range_end = *++p;
- }
- else {
- range_start = range_end = *p;
- }
-
- /* if end of pattern then bad pattern (Missing ']') */
- if (!*p)
- return MATCH_PATTERN;
-
- /* check for range bar */
- if (*++p == '-') {
-
- /* get the range end */
- range_end = *++p;
-
- /* if end of pattern or construct then bad pattern */
- if (range_end == '\0' || range_end == ']')
- return MATCH_PATTERN;
-
- /* special character range end */
- if (range_end == '\\') {
- range_end = *++p;
-
- /* if end of text then we have a bad pattern */
- if (!range_end)
- return MATCH_PATTERN;
- }
-
- /* move just beyond this range */
- p++;
- }
-
- /* if the text character is in range then match found.
- make sure the range letters have the proper
- relationship to one another before comparison */
- if ( range_start < range_end ) {
- if (*t >= range_start && *t <= range_end) {
- member_match = TRUE;
- loop = FALSE;
- }
- }
- else {
- if (*t >= range_end && *t <= range_start) {
- member_match = TRUE;
- loop = FALSE;
- }
- }
- }
-
- /* if there was a match in an exclusion set then no match */
- /* if there was no match in a member set then no match */
- if ((invert && member_match) ||
- !(invert || member_match))
- return MATCH_RANGE;
-
- /* if this is not an exclusion then skip the rest of the [...]
- construct that already matched. */
- if (member_match) {
- while (*p != ']') {
-
- /* bad pattern (Missing ']') */
- if (!*p)
- return MATCH_PATTERN;
-
- /* skip exact match */
- if (*p == '\\') {
- p++;
-
- /* if end of text then we have a bad pattern */
- if (!*p)
- return MATCH_PATTERN;
- }
-
- /* move to next pattern char */
- p++;
- }
- }
-
- break;
- }
-
- /* next character is quoted and must match exactly */
- case '\\':
-
- /* move pattern pointer to quoted char and fall through */
- p++;
-
- /* if end of text then we have a bad pattern */
- if (!*p)
- return MATCH_PATTERN;
-
- /* must match this character exactly */
- default:
- if (*p != *t)
- return MATCH_LITERAL;
- }
- }
-
- /* if end of text not reached then the pattern fails */
- if ( *t )
- return MATCH_END;
- else
- return MATCH_VALID;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * recursively call matche() with final segment of PATTERN and of TEXT.
- *
- ----------------------------------------------------------------------------*/
-
- int matche_after_star (register char *p, register char *t)
- {
- register int match = 0;
- register nextp;
-
- /* pass over existing ? and * in pattern */
- while ( *p == '?' || *p == '*' ) {
-
- /* take one char for each ? and + */
- if ( *p == '?' ) {
-
- /* if end of text then no match */
- if ( !*t++ ) {
- return MATCH_ABORT;
- }
- }
-
- /* move to next char in pattern */
- p++;
- }
-
- /* if end of pattern we have matched regardless of text left */
- if ( !*p ) {
- return MATCH_VALID;
- }
-
- /* get the next character to match which must be a literal or '[' */
- nextp = *p;
- if ( nextp == '\\' ) {
- nextp = p[1];
-
- /* if end of text then we have a bad pattern */
- if (!nextp)
- return MATCH_PATTERN;
- }
-
- /* Continue until we run out of text or definite result seen */
- do {
-
- /* a precondition for matching is that the next character
- in the pattern match the next character in the text or that
- the next pattern char is the beginning of a range. Increment
- text pointer as we go here */
- if ( nextp == *t || nextp == '[' ) {
- match = matche(p, t);
- }
-
- /* if the end of text is reached then no match */
- if ( !*t++ ) match = MATCH_ABORT;
-
- } while ( match != MATCH_VALID &&
- match != MATCH_ABORT &&
- match != MATCH_PATTERN);
-
- /* return result */
- return match;
- }
-
-
- /*----------------------------------------------------------------------------
- *
- * match() is a shell to matche() to return only BOOLEAN values.
- *
- ----------------------------------------------------------------------------*/
-
- BOOLEAN match( char *p, char *t )
- {
- int error_type;
- error_type = matche(p,t);
- return (error_type == MATCH_VALID ) ? TRUE : FALSE;
- }
-
-
- #ifdef TEST
-
- /*
- * This test main expects as first arg the pattern and as second arg
- * the match string. Output is yaeh or nay on match. If nay on
- * match then the error code is parsed and written.
- */
-
- #include <stdio.h>
-
- int main(int argc, char *argv[])
- {
- int error;
- int is_valid_error;
-
- if (argc != 3) {
- printf("Usage: MATCH Pattern Text\n");
- }
- else {
- printf("Pattern: %s\n", argv[1]);
- printf("Text : %s\n", argv[2]);
-
- if (!is_pattern(argv[1])) {
- printf(" First Argument Is Not A Pattern\n");
- }
- else {
- error = matche(argv[1],argv[2]);
- is_valid_pattern(argv[1],&is_valid_error);
-
- switch ( error ) {
- case MATCH_VALID:
- printf(" Match Successful");
- if (is_valid_error != PATTERN_VALID)
- printf(" -- is_valid_pattern() is complaining\n");
- else
- printf("\n");
- break;
- case MATCH_LITERAL:
- printf(" Match Failed on Literal\n");
- break;
- case MATCH_RANGE:
- printf(" Match Failed on [..]\n");
- break;
- case MATCH_ABORT:
- printf(" Match Failed on Early Text Termination\n");
- break;
- case MATCH_END:
- printf(" Match Failed on Early Pattern Termination\n");
- break;
- case MATCH_PATTERN:
- switch ( is_valid_error ) {
- case PATTERN_VALID:
- printf(" Internal Disagreement On Pattern\n");
- break;
- case PATTERN_ESC:
- printf(" Literal Escape at End of Pattern\n");
- break;
- case PATTERN_RANGE:
- printf(" No End of Range in [..] Construct\n");
- break;
- case PATTERN_CLOSE:
- printf(" [..] Construct is Open\n");
- break;
- case PATTERN_EMPTY:
- printf(" [..] Construct is Empty\n");
- break;
- default:
- printf(" Internal Error in is_valid_pattern()\n");
- }
- break;
- default:
- printf(" Internal Error in matche()\n");
- break;
- }
- }
-
- }
- return(0);
- }
-
- #endif
-